#description(zone-days-start-end-files-hours of speech)
zone_list<-unique(Bpc_freq$zone)
des<-data.frame(zone=zone_list)
for (i in 1:length(zone_list)){
des[i,"ndays"]<-nrow(unique(Bpc_freq[which(Bpc_freq$zone==zone_list[i]),'date']))
des[i,"start"]<-head(unique(Bpc_freq[which(Bpc_freq$zone==zone_list[i]),'ts']),1)
des[i,"end"]<-tail(unique(Bpc_freq[which(Bpc_freq$zone==zone_list[i]),'ts']),1)
des[i,"nfiles"]<-nrow(Bpc_freq[which(Bpc_freq$zone==zone_list[i]),])
des[i,"num_bpc"]<-sum(Bpc_freq[which(Bpc_freq$zone==zone_list[i]),"num_bpc"])
}
print(des)
## zone ndays start end nfiles num_bpc
## 1 Zone 1 360 2018-08-04 06:07:00 2019-08-05 15:26:00 16781 2659392
## 2 Zone 10 358 2018-08-02 23:47:00 2019-07-31 23:18:00 16905 4343509
## 3 Zone 11 357 2018-08-05 01:34:00 2019-07-31 23:05:00 16000 1567307
## 4 Zone 12 356 2018-08-07 05:17:00 2019-07-31 23:19:00 16305 4083850
## 5 Zone 13 357 2018-08-02 23:48:00 2019-07-31 23:18:00 16822 2169841
## 6 Zone 2 351 2018-08-01 01:22:00 2019-07-31 23:17:00 15996 2114226
## 7 Zone 3 183 2018-08-04 06:04:00 2019-02-02 10:22:00 8603 1130190
## 8 Zone 4 354 2018-08-04 06:36:00 2019-07-31 23:11:00 16305 4318849
## 9 Zone 5 186 2018-08-01 10:39:00 2019-02-02 17:30:00 8466 1618740
## 10 Zone 6 239 2018-10-30 09:13:00 2019-06-30 23:38:00 11312 2607037
## 11 Zone 8 356 2018-08-06 06:03:00 2019-07-31 23:04:00 16716 5000410
#plot
#ggplot(data = zone_1, aes(x=ts, y=num_bpc)) + geom_col(position = 'dodge')+theme(axis.text.x = element_text(angle=90,hjust=1))
plot(xts(zone_1[,5],order.by=zone_1[,1]),main="zone 1")
library(TTR)
tss<-ts(zone_1[,5],freq=1800,start=1)
ts_components<-decompose(tss)
plot(ts_components)
ts_seasonally_adjusted<-tss-ts_components$seasonal
plot(ts_seasonally_adjusted)
print(acf(data$num_bpc, lag.max = 20, main="auto-correlation test(20)"))
##
## Autocorrelations of series 'data$num_bpc', by lag
##
## 0 1 2 3 4 5 6 7 8 9 10
## 1.000 0.610 0.499 0.440 0.380 0.326 0.279 0.230 0.175 0.125 0.078
## 11 12 13 14 15 16 17 18 19 20
## 0.033 -0.012 -0.037 -0.058 -0.081 -0.089 -0.084 -0.090 -0.091 -0.083
print(pacf(data$num_bpc, lag.max = 20, main="partial auto-correlation test"))
##
## Partial autocorrelations of series 'data$num_bpc', by lag
##
## 1 2 3 4 5 6 7 8 9 10 11
## 0.610 0.202 0.122 0.052 0.022 0.007 -0.013 -0.036 -0.039 -0.041 -0.043
## 12 13 14 15 16 17 18 19 20
## -0.048 -0.021 -0.015 -0.020 -0.002 0.015 -0.003 -0.002 0.007
acf(data$num_bpc, lag.max = 200, main="auto-correlation test(200)")
library(urca)
summary(ur.kpss(data$num_bpc))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 14 lags.
##
## Value of test-statistic is: 8.4413
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
# use KPSS test to determine the appropriate number of differences
# nonstationary->stationary
library(forecast)
ndiffs(tss)
## [1] 1
summary(ur.kpss(diff(tss)))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 14 lags.
##
## Value of test-statistic is: 8e-04
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
tss %>% diff %>% ggtsdisplay(main="")
# auto-arima
ts_adj<-diff(tss)
fit<-auto.arima(ts_adj,seasonal = FALSE)
summary(fit)
## Series: ts_adj
## ARIMA(1,0,1) with zero mean
##
## Coefficients:
## ar1 ma1
## 0.2041 -0.6929
## s.e. 0.0156 0.0122
##
## sigma^2 estimated as 3949: log likelihood=-93288.02
## AIC=186582 AICc=186582 BIC=186605.2
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.0198953 62.8359 49.05602 NaN Inf 0.6307193 -0.002759664
Yt = c + 0.2041Yt-1 -0.6929 epsilon t-1 + epsilon t
fit %>% forecast(h=10) %>% autoplot(include=80)
checkresiduals(fit)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,1) with zero mean
## Q* = 12500, df = 3354, p-value < 2.2e-16
##
## Model df: 2. Total lags used: 3356
res<-fit$residuals
res2<-res^2
par(mfrow=c(1,2))
acf(as.vector(res2),main="ACF of Squared Residuals")
pacf(as.vector(res2),main="PACF of Squared Residuals")
library(MTS)
archTest(res)
## Q(m) of squared series(LM test):
## Test statistic: 194.1281 p-value: 0
## Rank-based Test:
## Test statistic: 152.2412 p-value: 0
library(rugarch)
spec = ugarchspec()
def.fit=ugarchfit(spec=spec, data=ts_adj)
print(def.fit)
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(1,0,1)
## Distribution : norm
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 0.041241 0.183852 0.22432 0.822509
## ar1 0.199464 0.015481 12.88444 0.000000
## ma1 -0.691977 0.012091 -57.22840 0.000000
## omega 7.897761 1.705693 4.63023 0.000004
## alpha1 0.005688 0.000447 12.73897 0.000000
## beta1 0.992300 0.000021 46307.37785 0.000000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 0.041241 0.121949 3.3819e-01 0.735224
## ar1 0.199464 0.017682 1.1281e+01 0.000000
## ma1 -0.691977 0.013116 -5.2757e+01 0.000000
## omega 7.897761 2.241616 3.5232e+00 0.000426
## alpha1 0.005688 0.000581 9.7894e+00 0.000000
## beta1 0.992300 0.000002 5.8399e+05 0.000000
##
## LogLikelihood : -93204.87
##
## Information Criteria
## ------------------------------------
##
## Akaike 11.110
## Bayes 11.113
## Shibata 11.110
## Hannan-Quinn 11.111
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.02088 0.88511
## Lag[2*(p+q)+(p+q)-1][5] 4.26764 0.03258
## Lag[4*(p+q)+(p+q)-1][9] 9.15467 0.02070
## d.o.f=2
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 71.31 0
## Lag[2*(p+q)+(p+q)-1][5] 84.03 0
## Lag[4*(p+q)+(p+q)-1][9] 87.62 0
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 4.834 0.500 2.000 0.02791
## ARCH Lag[5] 8.724 1.440 1.667 0.01343
## ARCH Lag[7] 9.353 2.315 1.543 0.02596
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 1.9688
## Individual Statistics:
## mu 0.00441
## ar1 0.82383
## ma1 0.57602
## omega 0.43604
## alpha1 0.59209
## beta1 0.53307
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 1.49 1.68 2.12
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 1.08140 2.795e-01
## Negative Sign Bias 0.02982 9.762e-01
## Positive Sign Bias 9.19164 4.313e-20 ***
## Joint Effect 180.64764 6.392e-39 ***
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 345.2 9.888e-62
## 2 30 379.0 1.296e-62
## 3 40 414.2 3.136e-64
## 4 50 435.2 2.379e-63
##
##
## Elapsed time : 1.205869
Yt = 0.00441 + 0.82383 R1 + 0.57602 A1
Omega^2 = 0.43604 + 0.59209(At-1)^2 + 0.53307(At-2)^2